New upstream version 1.0.5

author Boyuan Yang <073plan@gmail.com>

Mon, 9 Oct 2017 13:42:06 +0000 (21:42 +0800)

committer Boyuan Yang <073plan@gmail.com>

Mon, 9 Oct 2017 13:42:06 +0000 (21:42 +0800)
author Boyuan Yang <073plan@gmail.com>
Mon, 9 Oct 2017 13:42:06 +0000 (21:42 +0800)
committer Boyuan Yang <073plan@gmail.com>
Mon, 9 Oct 2017 13:42:06 +0000 (21:42 +0800)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 5ff33eb0711eb73b979053830d798c5038412d2e..2a29415bdffc3ed9b1eb54472343bbe37fd36670 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,7 +32,7 @@ set (PACKAGE_URL https://github.com/BYVoid/Opencc)
  set (PACKAGE_BUGREPORT https://github.com/BYVoid/Opencc/issues)
  set (OPENCC_VERSION_MAJOR 1)
  set (OPENCC_VERSION_MINOR 0)
-set (OPENCC_VERSION_REVISION 4)
+set (OPENCC_VERSION_REVISION 5)
  
  if (CMAKE_BUILD_TYPE MATCHES Debug)
    set (version_suffix .Debug)
@@ -53,10 +53,10 @@ include(CPack)
  
  ######## Windows
  
-if (WIN32)
-  set(CMAKE_SHARED_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
-  set(CMAKE_STATIC_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
-endif (WIN32)
+#if (WIN32)
+#  set(CMAKE_SHARED_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
+#  set(CMAKE_STATIC_LIBRARY_PREFIX ${CMAKE_INSTALL_PREFIX})
+#endif (WIN32)
  
  ######## Mac OS X
  
@@ -68,7 +68,6 @@ set (DIR_PREFIX ${CMAKE_INSTALL_PREFIX})
  set (DIR_INCLUDE ${DIR_PREFIX}/include/)
  set (DIR_SHARE ${DIR_PREFIX}/share/)
  set (DIR_ETC ${DIR_PREFIX}/etc/)
-set (LIB_SUFFIX "")
  set (DIR_LIBRARY ${DIR_PREFIX}/lib${LIB_SUFFIX}/)
  
  if (DEFINED SHARE_INSTALL_PREFIX)
@@ -83,8 +82,12 @@ if (DEFINED SYSCONF_INSTALL_DIR)
    set (DIR_ETC ${SYSCONF_INSTALL_DIR})
  endif (DEFINED SYSCONF_INSTALL_DIR)
  
-set (DIR_SHARE_OPENCC ${DIR_SHARE}opencc/)
-set (DIR_SHARE_LOCALE ${DIR_SHARE}locale/)
+if (DEFINED LIB_INSTALL_DIR)
+  set (DIR_LIBRARY ${LIB_INSTALL_DIR})
+endif (DEFINED LIB_INSTALL_DIR)
+
+set (DIR_SHARE_OPENCC ${DIR_SHARE}/opencc/)
+set (DIR_SHARE_LOCALE ${DIR_SHARE}/locale/)
  
  ######## Configuration
  
@@ -130,7 +133,7 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
    endif ()
  elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
    add_definitions(
-    /Wall
+    /W4
      /D "_CRT_SECURE_NO_WARNINGS"
    )
  endif()
diff --git a/NEWS.md b/NEWS.md

index 90553e414b3b3c1fd87cd96b0b1a1b26959fe4ae..b3c2fc6cfc3e3a27edae5b7bf33fde46298df4b6 100644 (file)
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,16 @@
  # Change History of OpenCC
  
+## Version 1.0.5
+
+2017年2月6日
+
+* 修正Windows下CMake和Visual Studio的問題。
+* 修正FNV Hash的32位編譯警告。
+* 增加若干臺灣常用詞彙轉換和異體字轉換。
+* 增加和修正若干轉換問題。
+* 加快Node模塊編譯速度。
+* 增加Node模塊的詞典轉換接口和Promise接口。
+
  ## Version 1.0.4
  
  2016年4月1日
diff --git a/README.md b/README.md

index 1ff2e917d46b510d3ef191b924975b3d210946b7..993034073ad93edd751ce2d0a09fc9c866c73238 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
  # Open Chinese Convert 開放中文轉換
  
+[ ![Download](https://api.bintray.com/packages/byvoid/opencc/OpenCC/images/download.svg) ](https://bintray.com/byvoid/opencc/OpenCC/_latestVersion)
+[![Build Status](https://travis-ci.org/BYVoid/OpenCC.svg?branch=master)](https://travis-ci.org/BYVoid/OpenCC)
+
  ## Introduction 介紹
  
  Open Chinese Convert (OpenCC, 開放中文轉換) is an opensource project for conversion between Traditional Chinese and Simplified Chinese, supporting character-level conversion, phrase-level conversion, variant conversion and regional idioms among Mainland China, Taiwan and Hong kong.
@@ -28,7 +31,7 @@ Open Chinese Convert (OpenCC, 開放中文轉換) is an opensource project for c
  * [Ubuntu](https://launchpad.net/ubuntu/+source/opencc)
  * [Fedora](https://admin.fedoraproject.org/pkgdb/package/opencc/)
  * [Arch Linux](https://www.archlinux.org/packages/community/x86_64/opencc/)
-* [Mac OS](https://github.com/mxcl/homebrew/blob/master/Library/Formula/opencc.rb)
+* [Mac OS](https://github.com/Homebrew/homebrew-core/blob/master/Formula/opencc.rb)
  * [Node.js](https://npmjs.org/package/opencc)
  
  ## Download 下載
@@ -62,8 +65,6 @@ https://bintray.com/byvoid/opencc/OpenCC
  
  ## Build 編譯
  
-[![Build Status](https://travis-ci.org/BYVoid/OpenCC.svg?branch=master)](https://travis-ci.org/BYVoid/OpenCC)
-
  ### Build with CMake
  
  Linux (gcc 4.6 is required):
@@ -83,15 +84,15 @@ sudo make PREFIX=/usr/local install
  Windows MSYS:
  
  ```
-cmake .. -G "MSYS Makefiles" -DCMAKE_INSTALL_PREFIX="" -DCMAKE_BUILD_TYPE=Release
-make
+cmake -H. -Bbuild -G "MSYS Makefiles" -DCMAKE_INSTALL_PREFIX="path/to/install"
+cmake --build build --config Release --target install
  ```
  
  Windows Visual Studio (2013 or higher required):
  
  ```
-cmake .. -G "Visual Studio 12" -DCMAKE_INSTALL_PREFIX="" -DCMAKE_BUILD_TYPE=Release
-make
+cmake -H. -Bbuild -G"Visual Studio 12" -DCMAKE_INSTALL_PREFIX="path/to/install"
+cmake --build build --config Release --target install
  ```
  
  ### iOS
@@ -165,5 +166,10 @@ https://github.com/BYVoid/OpenCC/blob/master/NEWS.md
  * [Flandre Scarlet](https://github.com/XadillaX)
  * [宋辰文](https://github.com/songchenwen)
  * [iwater](https://github.com/iwater)
+* [Xpol Wan](https://github.com/xpol)
+* [Weihang Lo](https://github.com/weihanglo)
+* [Cychih](https://github.com/pi314)
+* [kyleskimo](https://github.com/kyleskimo)
+* [Ryuan Choi](https://github.com/bunhere)
  
  Please update this list you have contributed OpenCC.
diff --git a/binding.gyp b/binding.gyp

index a86dcb828636b1fe01f1bb95b472b18d32489c60..89ae598c33eece5e8da094c7465ed546504148c9 100644 (file)
--- a/binding.gyp
+++ b/binding.gyp
@@ -2,7 +2,6 @@
    "includes": [
      "node/global.gypi",
      "node/configs.gypi",
-    "node/opencc_dict.gypi",
      "node/dicts.gypi",
      "node/node_binding.gypi",
    ]
diff --git a/data/CMakeLists.txt b/data/CMakeLists.txt

index 2deb7642b9acdbcfdc6e0ac6e214a053b0ab05d2..151677536ca1b0ea80736fb5a00a9d916506f308 100644 (file)
--- a/data/CMakeLists.txt
+++ b/data/CMakeLists.txt
@@ -109,6 +109,8 @@ foreach(DICT ${DICTS})
        ${DICT}.ocd
      COMMENT
        "Building ${DICT}.ocd"
+    COMMAND
+      ${CMAKE_COMMAND} -E copy "$<TARGET_FILE:libopencc>" "$<TARGET_FILE_DIR:${OPENCC_DICT_BIN}>"
      COMMAND
        ${OPENCC_DICT_BIN}
          --input ${DICT_${DICT}_INPUT}
diff --git a/data/dictionary/STCharacters.txt b/data/dictionary/STCharacters.txt

index b937f526864b70cc62b8246a171f35ad119aaf67..6d9ff27d29d451f08856fb3f2cb03e9c8745336c 100644 (file)
--- a/data/dictionary/STCharacters.txt
+++ b/data/dictionary/STCharacters.txt
@@ -3878,9 +3878,21 @@
  𫠒   鱆
  𫠖   𩿅
  𫠜   齯
+𫢸   僤
+𫮃   墠
+𫰛   娙
+𫶇   嵽
+𫷷   廞
+𫸩   彄
+𬀩   暐
  𬬭   錀
  𬬻   鑪
  𬭊   𨧀
  𬭛   𨨏
  𬭳   𨭎
  𬭶   𨭆
+𬶋   鮈
+𬶍   鮀
+𬶏   鮠
+𬶟   鯻
+𬸪   鷭
diff --git a/data/dictionary/STPhrases.txt b/data/dictionary/STPhrases.txt

index 11be47c7c5f1a8f7ef3d722c6c211990b1412fba..28f14fd23c5c88122f54b59b87e007861c1904d0 100644 (file)
--- a/data/dictionary/STPhrases.txt
+++ b/data/dictionary/STPhrases.txt
@@ -938,7 +938,7 @@
  下注解      下註解
  下游 下游
  下游工业   下游工業
-ä¸\8bç¡®ç\95\8c      ä¸\8bç¡®界
+ä¸\8bç¡®ç\95\8c      ä¸\8bç¢º界
  下种 下種
  下笔千言   下筆千言
  下签 下籤
@@ -5008,7 +5008,7 @@
  僵固 僵固
  僵固性      僵固性
  僵尸 殭屍
-僵尸网络   僵屍網絡
+僵尸网络   殭屍網絡
  僵局 僵局
  僵持 僵持
  僵持不下   僵持不下
@@ -6696,7 +6696,7 @@
  几下 幾下
  几世 幾世
  几世纪      幾世紀
-å\87 ä¸\9d å\87 絲
+å\87 ä¸\9d å¹¾絲
  几两 幾兩
  几个 幾個
  几个人      幾個人
@@ -9427,7 +9427,7 @@
  千里之行   千里之行
  千里命驾   千里命駕
  千里始足下        千里始足下
-千里姻缘一线牵  千裏姻緣一線牽
+千里姻缘一线牵  千里姻緣一線牽
  千里寄鹅毛        千里寄鵝毛
  千里搭长棚        千里搭長棚
  千里犹面   千里猶面
@@ -11505,7 +11505,7 @@
  受制 受制
  受制于      受制於
  受制于人   受制於人
-受命于天   受命于天
+受命于天   受命於天
  受困 受困
  受夠了      受夠了
  受尽 受盡
@@ -11517,7 +11517,7 @@
  受托者      受託者
  受折磨      受折磨
  受用不尽   受用不盡
-受聘于      受聘于
+受聘于      受聘於
  受阻于      受阻於
  受限于      受限於
  受难曲      受難曲
@@ -13242,7 +13242,7 @@
  吊祭 弔祭
  吊稍 吊稍
  吊窗 吊窗
-å\90\8aç¯® å¼\94籃
+å\90\8aç¯® å\90\8a籃
  吊索 吊索
  吊纸 弔紙
  吊线 吊線
@@ -13254,7 +13254,7 @@
  吊脚儿事   弔腳兒事
  吊腰撒跨   弔腰撒跨
  吊膀子      吊膀子
-å\90\8aè\87\82 å¼\94臂
+å\90\8aè\87\82 å\90\8a臂
  吊衣架      吊衣架
  吊袜 吊襪
  吊袜带      吊襪帶
@@ -15039,7 +15039,7 @@
  哪里 哪裏
  哪里买      哪裏買
  哪里人      哪裏人
-哪里哪里   哪里哪里
+哪里哪里   哪裏哪裏
  哪里摔倒哪里爬  哪裏摔倒哪裏爬
  哭个 哭個
  哭个夠      哭個夠
@@ -17134,7 +17134,7 @@
  大不里士   大不里士
  大丑 大丑
  大专同学   大專同學
-大专杯      大專杯
+大专杯      大專盃
  大业千秋   大業千秋
  大个 大個
  大个儿      大個兒
@@ -18164,7 +18164,7 @@
  威布里吉   威布里吉
  威廉亚历山大     威廉亞歷山大
  威比苏诺   威比蘇諾
-威氏注音法        威氏註音法
+威氏注音法        威氏注音法
  威里斯      威里斯
  威风八面   威風八面
  娇娘 嬌娘
@@ -19185,7 +19185,7 @@
  尼布甲尼撒        尼布甲尼撒
  尼庵 尼庵
  尼采 尼采
-尼龙布      尼龍佈
+尼龙布      尼龍布
  尽世 盡世
  尽义务      盡義務
  尽了 盡了
@@ -20591,7 +20591,7 @@
  干片 乾片
  干犯 干犯
  干犯法      幹犯法
-干球温度   幹球溫度
+干球温度   乾球溫度
  干甚 幹甚
  干甚么      幹甚麼
  干生受      乾生受
@@ -23655,7 +23655,7 @@
  战术轰炸   戰術轟炸
  战栗 戰慄
  战略伙伴   戰略伙伴
-战略防御倡议     戰略防御倡議
+战略防御倡议     戰略防禦倡議
  战胜 戰勝
  战胜国      戰勝國
  战表 戰表
@@ -23706,7 +23706,7 @@
  戴姆勒克莱斯勒  戴姆勒克萊斯勒
  戴希穆克   戴希穆克
  戴瑞克罗   戴瑞克羅
-戴维斯杯   戴維斯杯
+戴维斯杯   戴維斯盃
  戴胜益      戴勝益
  戴胜通      戴勝通
  戴蒙 戴蒙
@@ -24988,7 +24988,7 @@
  抵押借款   抵押借款
  抵挡不了   抵擋不了
  抵牾 牴牾
-æ\8aµè§¸ 牴觸
+æ\8aµè§¦ 牴觸
  抵针 抵針
  抹了 抹了
  抹布 抹布
@@ -27381,7 +27381,7 @@
  新艺术      新藝術
  新艺综合体        新藝綜合體
  新芬党      新芬黨
-æ\96°è\8d¯ æ\96°è\91¯
+æ\96°è\8d¯ æ\96°è\97¥
  新莺出谷   新鶯出谷
  新规范      新規範
  新闻价值   新聞價值
@@ -29325,7 +29325,7 @@
  杠头 槓頭
  杠子 槓子
  杠杆 槓桿
-æ\9d æ\9d\86æ\94¶è´   æ\9d æ\9d\86收購
+æ\9d æ\9d\86æ\94¶è´   æ§\93æ¡¿收購
  杠杠 槓槓
  杠牌 槓牌
  杠着 槓着
@@ -29447,7 +29447,7 @@
  杯酒解怨   杯酒解怨
  杯酒言欢   杯酒言歡
  杯酒释兵权        杯酒釋兵權
-杯里 杯里
+杯里 杯裏
  杯面 杯麪
  杰乐米      傑樂米
  杰伊汉港   傑伊漢港
@@ -30228,7 +30228,7 @@
  核儿 核兒
  核冬天      核冬天
  核准 覈准
-核准的      覈準的
+核准的      覈准的
  核减 覈減
  核出口控制        核出口控制
  核力 核力
@@ -32183,11 +32183,11 @@
  注释 註釋
  注重 注重
  注销 註銷
-注音 註音
+注音 注音
  注音一式   注音一式
  注音字母   注音字母
  注音文      注音文
-注音法      註音法
+注音法      注音法
  注音符号   注音符號
  泪出痛肠   淚出痛腸
  泪如泉涌   淚如泉涌
@@ -36025,7 +36025,7 @@
  石油输出国家组织       石油輸出國家組織
  石油输出国组织  石油輸出國組織
  石灰岩      石灰岩
-ç\9f³ç\81°å²©æ´\9e   ç\9f³ç\81°å·\96洞
+ç\9f³ç\81°å²©æ´\9e   ç\9f³ç\81°å²©洞
  石炭系      石炭系
  石版术      石版術
  石百合      石百合
@@ -36708,7 +36708,7 @@
  种地 種地
  种姓 種姓
  种姓制      種姓制
-种姓制度   種姓製度
+种姓制度   種姓制度
  种子 種子
  种子园      種子園
  种子地      種子地
@@ -37441,7 +37441,7 @@
  签证 簽證
  签证费      簽證費
  签诗 籤詩
-ç¾è¯é¥¼      ç°½語餅
+ç¾è¯é¥¼      ç±¤語餅
  签赌 簽賭
  签赌案      簽賭案
  签赌站      簽賭站
@@ -38298,7 +38298,7 @@
  给于 給於
  给价 給價
  给出 給出
-给我干脆   給我干脆
+给我干脆   給我乾脆
  给药 給藥
  绚丽多彩   絢麗多彩
  绚烂归于平淡     絢爛歸於平淡
@@ -39948,7 +39948,7 @@
  致理技术学院     致理技術學院
  致用 致用
  致电 致電
-致畸 緻畸
+致畸 致畸
  致疑 致疑
  致病 致病
  致病性      致病性
@@ -42598,8 +42598,8 @@
  计穷虑极   計窮慮極
  计算出      計算出
  计算出来   計算出來
-计算机制图        計算機制圖
-计算机集成制造  計算機集成制造
+计算机制图        計算機製圖
+计算机集成制造  計算機集成製造
  计量制      計量制
  订个 訂個
  订了 訂了
@@ -43763,7 +43763,7 @@
  足于 足於
  足协杯      足協盃
  足坛 足壇
-足总杯      足總杯
+足总杯      足總盃
  足食丰衣   足食豐衣
  趸售物价   躉售物價
  趸当 躉當
@@ -46630,7 +46630,7 @@
  阿扎伦卡   阿紮倫卡
  阿扎尼亚   阿扎尼亞
  阿托品      阿托品
-阿拉伯共同市场  阿拉伯共衕市場
+阿拉伯共同市场  阿拉伯共同市場
  阿拉伯联合大公国       阿拉伯聯合大公國
  阿拉伯联合酋长国       阿拉伯聯合酋長國
  阿拉克      阿拉克
@@ -47704,7 +47704,7 @@
  须发文      須發文
  须发皆白   鬚髮皆白
  须发表      須發表
-é¡»å\90\8eæ°´      é \88後水
+é¡»å\90\8eæ°´      é¬\9a後水
  须子 鬚子
  须将有日思无日  須將有日思無日
  须弥 須彌
@@ -48132,7 +48132,7 @@
  香熏疗法   香薰療法
  香皂 香皂
  香菜叶      香菜葉
-香蜡 香
+香蜡 香蠟
  香蜡店      香蠟店
  香蜡纸马   香蠟紙馬
  香蜡铺      香蠟鋪
diff --git a/data/dictionary/TWPhrasesOther.txt b/data/dictionary/TWPhrasesOther.txt

index 2753a7de904bd80d1f94c2f6c4e7f270befe4c2f..090a9a0bb63bfabbcce0d193a22105acc79c199c 100644 (file)
--- a/data/dictionary/TWPhrasesOther.txt
+++ b/data/dictionary/TWPhrasesOther.txt
@@ -7,6 +7,7 @@
  涼菜 冷盤
  砹    砈
  硅    矽
+納米 奈米
  詞組 片語
  蹦極 笨豬跳
  輔音 子音
diff --git a/data/dictionary/TWVariants.txt b/data/dictionary/TWVariants.txt

index 7cd8ece023c79300fe228b8e2488e29e48463b61..ce6ba24e2b9e3479133695fe4c64dc56bfaa98a3 100644 (file)
--- a/data/dictionary/TWVariants.txt
+++ b/data/dictionary/TWVariants.txt
@@ -5,8 +5,10 @@
  嬀    媯
  峯    峰
  幺    么
+擡    抬
  曬    晒
  棱    稜
+檐    簷
  污    汙
  泄    洩
  涌    湧
@@ -20,6 +22,7 @@
  睾    睪
  竈    灶
  糉    粽
+繮    韁
  纔    才
  羣    群
  蔿    蒍
@@ -27,5 +30,7 @@
  裏    裡
  覈    核
  踊    踴
+鉢    缽
  鮎    鯰
  麪    麵
+齶    顎
diff --git a/data/scheme/st_multi.txt b/data/scheme/st_multi.txt

index 4b8a0f41b65c44269395bd70328198761a0cb280..188f0ee055d0b753f74727f63d7cd8d3f9edbf6d 100644 (file)
--- a/data/scheme/st_multi.txt
+++ b/data/scheme/st_multi.txt
@@ -48,7 +48,7 @@
  云    雲 云 「云」意義爲「說」，其餘用「雲」。     人云亦云 雲霧
  仆    僕 仆 「仆」意義爲「跌倒」，讀音pu1，「僕」爲「供人使喚的人」，讀音pu2。        前仆後繼 仆街 奴僕 公僕 風塵僕僕
  舍    舍 捨 「捨」讀作she3，用於「放棄」意義，其餘用「舍」，讀作she4，古文亦同「捨」。     宿舍 村舍 退避三舍 捨弃 舍我其誰 不舍晝夜
-ç¾    ç±\96 簽 「簽」用於動詞，表示「題字題名」，其餘用「籤」。        簽名 簽證 標籤 書籤 牙籤
+ç¾    ç±¤ 簽 「簽」用於動詞，表示「題字題名」，其餘用「籤」。        簽名 簽證 標籤 書籤 牙籤
  折    折 摺 與「叠」有關用「摺」，與「斷」有關用「折」。      摺紙 摺扇 存摺 折斷 折腰 折服 打折 損兵折將
  谷    谷 穀 表示「兩山之間」的地域用「谷」，表示農作物時用「穀」。       山谷 稻穀
  几    幾 几 「几」只用作「茶几」。表示「幾乎」、「幾個」意義用「幾」。 茶几 幾乎 幾個
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt

index 92134041f7453fd0717598a45cfdf2fb64423e8f..c16e2892feaaaa3ddcf4419aec5dfdcf098b4613 100644 (file)
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -29,7 +29,7 @@ if(BUILD_DOCUMENTATION)
         DIRECTORY
                 ${CMAKE_BINARY_DIR}/doc/html
         DESTINATION
-               ${DIR_SHARE_OPENCC}doc
+               ${DIR_SHARE_OPENCC}/doc
      )
  
      set_directory_properties(
diff --git a/node/binding.cc b/node/binding.cc

index 7ae6bfa0b3dd9f94e1a399f6a2358d08dc73ca3b..b948e55e6ab5e3fe46ec77d0ea0b34ebb3ee3f53 100644 (file)
--- a/node/binding.cc
+++ b/node/binding.cc
@@ -3,6 +3,23 @@
  
  #include "Config.hpp"
  #include "Converter.hpp"
+#include "DictConverter.hpp"
+
+// For faster build
+#include "BinaryDict.cpp"
+#include "Config.cpp"
+#include "Conversion.cpp"
+#include "ConversionChain.cpp"
+#include "Converter.cpp"
+#include "DartsDict.cpp"
+#include "Dict.cpp"
+#include "DictConverter.cpp"
+#include "DictEntry.cpp"
+#include "DictGroup.cpp"
+#include "MaxMatchSegmentation.cpp"
+#include "Segmentation.cpp"
+#include "TextDict.cpp"
+#include "UTF8Util.cpp"
  
  using namespace opencc;
  
@@ -38,12 +55,16 @@ class OpenccBinding : public Nan::ObjectWrap {
      return converter_->Convert(input);
    }
  
+  static NAN_METHOD(Version) {
+    info.GetReturnValue().Set(Nan::New<v8::String>(VERSION).ToLocalChecked());
+  }
+
    static NAN_METHOD(New) {
      OpenccBinding* instance;
  
      try {
        if (info.Length() >= 1 && info[0]->IsString()) {
-        string configFile = ToUtf8String(info[0]);
+        const string configFile = ToUtf8String(info[0]);
          instance = new OpenccBinding(configFile);
        } else {
          instance = new OpenccBinding("s2t.json");
@@ -111,7 +132,7 @@ class OpenccBinding : public Nan::ObjectWrap {
  
      OpenccBinding* instance = Nan::ObjectWrap::Unwrap<OpenccBinding>(info.This());
  
-    string input = ToUtf8String(info[0]);
+    const string input = ToUtf8String(info[0]);
      string output;
      try {
        output = instance->Convert(input);
@@ -124,11 +145,31 @@ class OpenccBinding : public Nan::ObjectWrap {
      info.GetReturnValue().Set(converted);
    }
  
+  static NAN_METHOD(GenerateDict) {
+    if (info.Length() < 4 || !info[0]->IsString() || !info[1]->IsString()
+       || !info[2]->IsString() || !info[3]->IsString()) {
+      Nan::ThrowTypeError("Wrong arguments");
+      return;
+    }
+    const string inputFileName = ToUtf8String(info[0]);
+    const string outputFileName = ToUtf8String(info[1]);
+    const string formatFrom = ToUtf8String(info[2]);
+    const string formatTo = ToUtf8String(info[3]);
+    try {
+      opencc::ConvertDictionary(inputFileName, outputFileName, formatFrom, formatTo);
+    } catch (opencc::Exception& e) {
+      Nan::ThrowError(e.what());
+    }
+  }
+
    static NAN_MODULE_INIT(Init) {
      // Prepare constructor template
      v8::Local<v8::FunctionTemplate> tpl = Nan::New<v8::FunctionTemplate>(OpenccBinding::New);
      tpl->SetClassName(Nan::New("Opencc").ToLocalChecked());
      tpl->InstanceTemplate()->SetInternalFieldCount(1);
+    // Methods
+    Nan::SetMethod(tpl, "version", Version);
+    Nan::SetMethod(tpl, "generateDict", GenerateDict);
      // Prototype
      Nan::SetPrototypeMethod(tpl, "convert", Convert);
      Nan::SetPrototypeMethod(tpl, "convertSync", ConvertSync);
diff --git a/node/demo.js b/node/demo.js

index eba2da527d69516fffb8f7f9a748688071e04c08..c3890aa43dbfc28d577241db242281708c00a08b 100644 (file)
--- a/node/demo.js
+++ b/node/demo.js
@@ -26,16 +26,23 @@
   */
  
  // In your project you should replace './opencc' with 'opencc'
-var OpenCC = require('./opencc');
+const OpenCC = require('./opencc');
+
+console.log('OpenCC version', OpenCC.version);
  
  // Load the default Simplified to Traditional config
-var opencc = new OpenCC('s2t.json');
+const opencc = new OpenCC('s2t.json');
  
  // Sync API
-var converted = opencc.convertSync("汉字");
+const converted = opencc.convertSync("汉字");
  console.log(converted);
  
  // Async API
-opencc.convert("汉字", function (err, converted) {
+opencc.convert("汉字", (err, converted) => {
+  console.log(err, converted);
+});
+
+// Async API with Promise
+opencc.convertPromise("汉字").then(converted => {
    console.log(converted);
  });
diff --git a/node/dict.js b/node/dict.js

new file mode 100644 (file)

index 0000000..59f471f
--- /dev/null
+++ b/node/dict.js
@@ -0,0 +1,6 @@
+const OpenCC = require('./opencc');
+
+const input = process.argv[2];
+const output = process.argv[3];
+
+OpenCC.generateDict(input, output, "text", "ocd");
diff --git a/node/dicts.gypi b/node/dicts.gypi

index 9cf4d064742791e74360920131c43f01ddc9b09f..3b96381eabe41e022591804897b9b5d29ebeec5c 100644 (file)
--- a/node/dicts.gypi
+++ b/node/dicts.gypi
@@ -3,7 +3,7 @@
      "target_name": "dicts",
      "type": "none",
      "variables": {
-      "cmd": "<(PRODUCT_DIR)/opencc_dict",
+      "cmd": "<(module_root_dir)/node/dict.js",
        "dict_merge": "<(module_root_dir)/data/scripts/merge.py",
        "dict_reverse": "<(module_root_dir)/data/scripts/reverse.py",
        "input_prefix": "<(module_root_dir)/data/dictionary/",
@@ -14,57 +14,57 @@
        "variables": {
          "input": "<(input_prefix)STCharacters.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)STCharacters.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "STPhrases",
        "variables": {
          "input": "<(input_prefix)STPhrases.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)STPhrases.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "TSCharacters",
        "variables": {
          "input": "<(input_prefix)TSCharacters.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)TSCharacters.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "TSPhrases",
        "variables": {
          "input": "<(input_prefix)TSPhrases.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)TSPhrases.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "TWVariants",
        "variables": {
          "input": "<(input_prefix)TWVariants.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)TWVariants.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "TWVariantsRevPhrases",
        "variables": {
          "input": "<(input_prefix)TWVariantsRevPhrases.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)TWVariantsRevPhrases.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "JPVariants",
        "variables": {
          "input": "<(input_prefix)JPVariants.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)JPVariants.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "TWPhrases.txt",
        "inputs": ["<(cmd)"],
@@ -75,7 +75,7 @@
        "variables": {
          "input": "<(input_prefix)TWVariants.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)TWVariantsRev.txt"],
        "action": ["python", "<(dict_reverse)", "<(input)", "<@(_outputs)"]
      }, {
@@ -83,7 +83,7 @@
        "variables": {
          "input": "<(output_prefix)TWPhrases.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)TWPhrasesRev.txt"],
        "action": ["python", "<(dict_reverse)", "<(input)", "<@(_outputs)"]
      }, {
@@ -91,55 +91,55 @@
        "variables": {
          "input": "<(output_prefix)TWPhrases.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)TWPhrases.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "TWVariantsRev",
        "variables": {
          "input": "<(output_prefix)TWVariantsRev.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)TWVariantsRev.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "TWPhrasesRev",
        "variables": {
          "input": "<(output_prefix)TWPhrasesRev.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)TWPhrasesRev.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "HKVariants",
        "variables": {
          "input": "<(input_prefix)HKVariants.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)HKVariants.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "HKVariantsPhrases",
        "variables": {
          "input": "<(input_prefix)HKVariantsPhrases.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)HKVariantsPhrases.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "HKVariantsRevPhrases",
        "variables": {
          "input": "<(input_prefix)HKVariantsRevPhrases.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)HKVariantsRevPhrases.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }, {
        "action_name": "HKVariantsRev.txt",
        "variables": {
          "input": "<(input_prefix)HKVariants.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)HKVariantsRev.txt"],
        "action": ["python", "<(dict_reverse)", "<(input)", "<@(_outputs)"]
      }, {
@@ -147,12 +147,12 @@
        "variables": {
          "input": "<(output_prefix)HKVariantsRev.txt",
        },
-      "inputs": ["<(cmd)", "<(input)"],
+      "inputs": ["<(input)"],
        "outputs": ["<(output_prefix)HKVariantsRev.ocd"],
-      "action": ["<(cmd)", "-i", "<(input)", "-o", "<@(_outputs)", "--from text", "--to ocd"]
+      "action": ["node", "<(cmd)", "<(input)", "<@(_outputs)"]
      }],
      "dependencies": [
-      "opencc_dict"
+      "binding"
      ]
    }]
  }
diff --git a/node/global.gypi b/node/global.gypi

index 433f1b7e2db913b9ff67d862a0b852d45253c13b..5becc15949e9637e92627d5911a0f3bba5eb604e 100644 (file)
--- a/node/global.gypi
+++ b/node/global.gypi
@@ -1,6 +1,6 @@
  {
    "variables": {
-    "opencc_version": "1.0.4"
+    "opencc_version": "1.0.5"
    },
    "target_defaults": {
      "defines": [
diff --git a/node/node_binding.gypi b/node/node_binding.gypi

index 1bb5c6b561b9ff0fd3491c6d4a7c8e80c97118ba..6022195900ee882fa524a5d839edfb6a632430ef 100644 (file)
--- a/node/node_binding.gypi
+++ b/node/node_binding.gypi
@@ -3,19 +3,6 @@
      "target_name": "binding",
      "sources": [
        "../node/binding.cc",
-      "../src/BinaryDict.cpp",
-      "../src/Config.cpp",
-      "../src/Conversion.cpp",
-      "../src/ConversionChain.cpp",
-      "../src/Converter.cpp",
-      "../src/DartsDict.cpp",
-      "../src/Dict.cpp",
-      "../src/DictEntry.cpp",
-      "../src/DictGroup.cpp",
-      "../src/MaxMatchSegmentation.cpp",
-      "../src/Segmentation.cpp",
-      "../src/TextDict.cpp",
-      "../src/UTF8Util.cpp",
      ],
      "include_dirs": [
        "../src",
diff --git a/node/opencc.js b/node/opencc.js

index 4604b9b832475be57ab7d08bf1d4d65c566ea2b6..70f95642659c63c0f7651c6062d98d5d19127c88 100644 (file)
--- a/node/opencc.js
+++ b/node/opencc.js
@@ -54,6 +54,33 @@ var OpenCC = module.exports = function (config) {
    this.handler = new binding.Opencc(config);
  };
  
+/**
+ * The version of OpenCC library.
+ *
+ * @fn OpenCC.version
+ * @memberof OpenCC
+ * @ingroup node_api
+ */
+OpenCC.version = binding.Opencc.version();
+
+/**
+ * Generates dictionary from another format.
+ *
+ * @fn string generateDict(string inputFileName, string outputFileName, string formatFrom, string formatTo)
+ * @memberof OpenCC
+ * @param inputFileName Input dictionary filename.
+ * @param outputFileName Output dictionary filename.
+ * @param formatFrom Input dictionary format.
+ * @param formatTo Input dictionary format.
+ * @return Converted text.
+ * @ingroup node_api
+ */
+OpenCC.generateDict = function(inputFileName, outputFileName,
+    formatFrom, formatTo) {
+  return binding.Opencc.generateDict(inputFileName, outputFileName,
+    formatFrom, formatTo); 
+}
+
  /**
   * Converts input text.
   *
@@ -79,3 +106,22 @@ OpenCC.prototype.convert = function (input, callback) {
  OpenCC.prototype.convertSync = function (input) {
    return this.handler.convertSync(input.toString());
  };
+
+/**
+ * Converts input text asynchronously and returns a Promise.
+ *
+ * @fn Promise convertPromise(string input)
+ * @memberof OpenCC
+ * @param input Input text.
+ * @return The Promise that will yield the converted text.
+ * @ingroup node_api
+ */
+OpenCC.prototype.convertPromise = function (input) {
+  const self = this;
+  return new Promise(function(resolve, reject) {
+    self.handler.convert(input.toString(), function(err, text) {
+      if (err) reject(err);
+      else resolve(text);
+    });
+  });
+};
diff --git a/node/opencc_dict.gypi b/node/opencc_dict.gypi

deleted file mode 100644 (file)

index 879f871..0000000
--- a/node/opencc_dict.gypi
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "targets": [{
-    "target_name": "opencc_dict",
-    "type": "executable",
-    "sources": [
-      "../src/BinaryDict.cpp",
-      "../src/DartsDict.cpp",
-      "../src/Dict.cpp",
-      "../src/DictEntry.cpp",
-      "../src/DictGroup.cpp",
-      "../src/TextDict.cpp",
-      "../src/UTF8Util.cpp",
-      "../src/tools/DictConverter.cpp",
-    ],
-    "include_dirs": [
-      "../src",
-      "../deps/darts-clone",
-      "../deps/tclap-1.2.1"
-    ]
-  }]
-}
diff --git a/package.json b/package.json

index f5457984c6ad022012115e841bd6221a34f92b0a..4d7233b393af91544e47a4dd5b406b0644923567 100644 (file)
--- a/package.json
+++ b/package.json
@@ -1,9 +1,9 @@
  {
    "name": "opencc",
-  "version": "1.0.4",
+  "version": "1.0.5",
    "description": "Conversion between Traditional and Simplified Chinese",
    "author": "BYVoid <byvoid@byvoid.com>",
-  "license": "Apache",
+  "license": "Apache-2.0",
    "main": "node/opencc.js",
    "scripts": {
      "test": "mocha -R spec node/test.js"
@@ -27,6 +27,6 @@
      "mocha": "2.2.5"
    },
    "dependencies": {
-    "nan": "^2.2.0"
+    "nan": "^2.5.1"
    }
  }
diff --git a/src/BinaryDict.cpp b/src/BinaryDict.cpp

index 9354fc8bd05cc751e523da7289535851fd255e14..87a215d57bada835c813699cd87f23a2fe7d6ed7 100644 (file)
--- a/src/BinaryDict.cpp
+++ b/src/BinaryDict.cpp
@@ -30,10 +30,10 @@ size_t BinaryDict::KeyMaxLength() const {
  }
  
  void BinaryDict::SerializeToFile(FILE* fp) const {
-  string keyBuffer, valueBuffer;
+  string keyBuf, valueBuf;
    vector<size_t> keyOffsets, valueOffsets;
    size_t keyTotalLength = 0, valueTotalLength = 0;
-  ConstructBuffer(keyBuffer, keyOffsets, keyTotalLength, valueBuffer,
+  ConstructBuffer(keyBuf, keyOffsets, keyTotalLength, valueBuf,
                    valueOffsets, valueTotalLength);
    // Number of items
    size_t numItems = lexicon->Length();
@@ -41,9 +41,9 @@ void BinaryDict::SerializeToFile(FILE* fp) const {
  
    // Data
    fwrite(&keyTotalLength, sizeof(size_t), 1, fp);
-  fwrite(keyBuffer.c_str(), sizeof(char), keyTotalLength, fp);
+  fwrite(keyBuf.c_str(), sizeof(char), keyTotalLength, fp);
    fwrite(&valueTotalLength, sizeof(size_t), 1, fp);
-  fwrite(valueBuffer.c_str(), sizeof(char), valueTotalLength, fp);
+  fwrite(valueBuf.c_str(), sizeof(char), valueTotalLength, fp);
  
    size_t keyCursor = 0, valueCursor = 0;
    for (const DictEntry* entry : *lexicon) {
@@ -131,8 +131,8 @@ BinaryDictPtr BinaryDict::NewFromFile(FILE* fp) {
    return dict;
  }
  
-void BinaryDict::ConstructBuffer(string& keyBuffer, vector<size_t>& keyOffset,
-                                 size_t& keyTotalLength, string& valueBuffer,
+void BinaryDict::ConstructBuffer(string& keyBuf, vector<size_t>& keyOffset,
+                                 size_t& keyTotalLength, string& valueBuf,
                                   vector<size_t>& valueOffset,
                                   size_t& valueTotalLength) const {
    keyTotalLength = 0;
@@ -152,28 +152,28 @@ void BinaryDict::ConstructBuffer(string& keyBuffer, vector<size_t>& keyOffset,
      }
    }
    // Write keys and values to buffers
-  keyBuffer.resize(keyTotalLength, '\0');
-  valueBuffer.resize(valueTotalLength, '\0');
-  char* pKeyBuffer = const_cast<char*>(keyBuffer.c_str());
-  char* pValueBuffer = const_cast<char*>(valueBuffer.c_str());
+  keyBuf.resize(keyTotalLength, '\0');
+  valueBuf.resize(valueTotalLength, '\0');
+  char* pKeyBuffer = const_cast<char*>(keyBuf.c_str());
+  char* pValueBuffer = const_cast<char*>(valueBuf.c_str());
    for (const DictEntry* entry : *lexicon) {
      strcpy(pKeyBuffer, entry->Key());
-    keyOffset.push_back(pKeyBuffer - keyBuffer.c_str());
+    keyOffset.push_back(pKeyBuffer - keyBuf.c_str());
      pKeyBuffer += entry->KeyLength() + 1;
      if (entry->NumValues() == 1) {
        const auto* svEntry = static_cast<const SingleValueDictEntry*>(entry);
        strcpy(pValueBuffer, svEntry->Value());
-      valueOffset.push_back(pValueBuffer - valueBuffer.c_str());
+      valueOffset.push_back(pValueBuffer - valueBuf.c_str());
        pValueBuffer += strlen(svEntry->Value()) + 1;
      } else {
        const auto* mvEntry = static_cast<const MultiValueDictEntry*>(entry);
        for (const auto& value : mvEntry->Values()) {
          strcpy(pValueBuffer, value);
-        valueOffset.push_back(pValueBuffer - valueBuffer.c_str());
+        valueOffset.push_back(pValueBuffer - valueBuf.c_str());
          pValueBuffer += strlen(value) + 1;
        }
      }
    }
-  assert(keyBuffer.c_str() + keyTotalLength == pKeyBuffer);
-  assert(valueBuffer.c_str() + valueTotalLength == pValueBuffer);
+  assert(keyBuf.c_str() + keyTotalLength == pKeyBuffer);
+  assert(valueBuf.c_str() + valueTotalLength == pValueBuffer);
  }
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt

index 68883a22c70202bb1c67aae704548aae9f2f3a41..498a4f6be46a82343e1798ffa79500a58fb4cbe8 100644 (file)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -16,6 +16,7 @@ set(
    Converter.hpp
    DartsDict.hpp
    Dict.hpp
+  DictConverter.hpp
    DictEntry.hpp
    DictGroup.hpp
    Exception.hpp
@@ -43,6 +44,7 @@ set(
    Converter.cpp
    DartsDict.cpp
    Dict.cpp
+  DictConverter.cpp
    DictEntry.cpp
    DictGroup.cpp
    MaxMatchSegmentation.cpp
@@ -54,7 +56,8 @@ set(
    UTF8Util.cpp
  )
  
-add_library(libopencc ${LIBOPENCC_SOURCES})
+add_library(libopencc ${LIBOPENCC_SOURCES} ${LIBOPENCC_HEADERS})
+source_group(libopencc FILES ${LIBOPENCC_SOURCES} ${LIBOPENCC_HEADERS})
  
  GENERATE_EXPORT_HEADER(
    libopencc
diff --git a/src/Config.cpp b/src/Config.cpp

index 35bc89be7e97ea0cbd0579cf694bb7175df98f0d..f302585d2c0c3f9ddbefeeb014131edc79123a0a 100644 (file)
--- a/src/Config.cpp
+++ b/src/Config.cpp
@@ -94,7 +94,7 @@ public:
    DictPtr ParseDict(const JSONValue& doc) {
      // Required: type
      string type = GetStringProperty(doc, "type");
-    DictPtr dict;
+
      if (type == "group") {
        list<DictPtr> dicts;
        const JSONValue& docs = GetArrayProperty(doc, "dicts");
@@ -114,6 +114,7 @@ public:
        if (cache != nullptr) {
          return cache;
        }
+      DictPtr dict;
        if (type == "text") {
          dict = LoadDictWithPaths<TextDict>(fileName);
        } else if (type == "ocd") {
@@ -231,7 +232,10 @@ ConverterPtr Config::NewFromString(const string& json,
    }
  
    ConfigInternal* impl = (ConfigInternal*)internal;
-  impl->configDirectory = configDirectory;
+  if (configDirectory.back() == '/' || configDirectory.back() == '\\')
+    impl->configDirectory = configDirectory;
+  else
+    impl->configDirectory = configDirectory + '/';
  
    // Required: segmentation
    SegmentationPtr segmentation =
diff --git a/src/ConfigTest.cpp b/src/ConfigTest.cpp

index 080cccf57b62166c2170a6bc4688c78c5342198b..6285f7aee42ed6dcf6b53c35adec764de7f1133c 100644 (file)
--- a/src/ConfigTest.cpp
+++ b/src/ConfigTest.cpp
@@ -57,4 +57,14 @@ TEST_F(ConfigTest, NonexistingPath) {
    }
  }
  
+TEST_F(ConfigTest, NewFromStringWitoutTrailingSlash) {
+  std::ifstream ifs(CONFIG_TEST_PATH);
+  string content(std::istreambuf_iterator<char>(ifs),
+                 (std::istreambuf_iterator<char>()));
+  string pathWithoutTrailingSlash = CMAKE_SOURCE_DIR "/test/config_test";
+
+  const ConverterPtr converter = config.NewFromString(
+      content, pathWithoutTrailingSlash);
+}
+
  } // namespace opencc
diff --git a/src/DartsDict.cpp b/src/DartsDict.cpp

index cfcc2c4bc0a9d814404d4f766c407330a8c01cbc..75f4c3ddf8046855462db7de1a0895602088997d 100644 (file)
--- a/src/DartsDict.cpp
+++ b/src/DartsDict.cpp
@@ -154,7 +154,6 @@ void DartsDict::SerializeToFile(FILE* fp) const {
    fwrite(&dartsSize, sizeof(size_t), 1, fp);
    fwrite(dict.array(), sizeof(char), dartsSize, fp);
  
-  auto internal = this->internal;
    internal->binary.reset(new BinaryDict(lexicon));
    internal->binary->SerializeToFile(fp);
  }
diff --git a/src/Dict.cpp b/src/Dict.cpp

index 755b67c406332ff1cd19d5e46040ba235a2df963..0e3f56d44b3d60136c13b6915e66355af14fbace 100644 (file)
--- a/src/Dict.cpp
+++ b/src/Dict.cpp
@@ -30,7 +30,7 @@ Optional<const DictEntry*> Dict::MatchPrefix(const char* word) const {
      if (!result.IsNull()) {
        return result;
      }
-    len -= UTF8Util::PrevCharLength(wordTruncPtr);
+    len -= static_cast<long>(UTF8Util::PrevCharLength(wordTruncPtr));
    }
    return Optional<const DictEntry*>::Null();
  }
@@ -40,7 +40,7 @@ vector<const DictEntry*> Dict::MatchAllPrefixes(const char* word) const {
    string wordTrunc = UTF8Util::TruncateUTF8(word, KeyMaxLength());
    const char* wordTruncPtr = wordTrunc.c_str() + wordTrunc.length();
    for (long len = static_cast<long>(wordTrunc.length()); len > 0;
-       len -= UTF8Util::PrevCharLength(wordTruncPtr)) {
+       len -= static_cast<long>(UTF8Util::PrevCharLength(wordTruncPtr))) {
      wordTrunc.resize(static_cast<size_t>(len));
      wordTruncPtr = wordTrunc.c_str() + len;
      const Optional<const DictEntry*>& result = Match(wordTrunc.c_str());
diff --git a/src/DictConverter.cpp b/src/DictConverter.cpp

new file mode 100644 (file)

index 0000000..06f37c1
--- /dev/null
+++ b/src/DictConverter.cpp
@@ -0,0 +1,57 @@
+/*
+ * Open Chinese Convert
+ *
+ * Copyright 2010-2017 BYVoid <byvoid@byvoid.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DartsDict.hpp"
+#include "DictConverter.hpp"
+#include "TextDict.hpp"
+
+using namespace opencc;
+
+DictPtr LoadDictionary(const string& format, const string& inputFileName) {
+  if (format == "text") {
+    return SerializableDict::NewFromFile<TextDict>(inputFileName);
+  } else if (format == "ocd") {
+    return SerializableDict::NewFromFile<DartsDict>(inputFileName);
+  } else {
+    fprintf(stderr, "Unknown dictionary format: %s\n", format.c_str());
+    exit(2);
+  }
+  return nullptr;
+}
+
+SerializableDictPtr ConvertDict(const string& format,
+                                      const DictPtr dict) {
+  if (format == "text") {
+    return TextDict::NewFromDict(*dict.get());
+  } else if (format == "ocd") {
+    return DartsDict::NewFromDict(*dict.get());
+  } else {
+    fprintf(stderr, "Unknown dictionary format: %s\n", format.c_str());
+    exit(2);
+  }
+  return nullptr;
+}
+
+namespace opencc {
+void ConvertDictionary(const string inputFileName, const string outputFileName,
+                       const string formatFrom, const string formatTo) {
+  DictPtr dictFrom = LoadDictionary(formatFrom, inputFileName);
+  SerializableDictPtr dictTo = ConvertDict(formatTo, dictFrom);
+  dictTo->SerializeToFile(outputFileName);
+}
+}
diff --git a/src/DictConverter.hpp b/src/DictConverter.hpp

new file mode 100644 (file)

index 0000000..f59c5ec
--- /dev/null
+++ b/src/DictConverter.hpp
@@ -0,0 +1,30 @@
+/*
+ * Open Chinese Convert
+ *
+ * Copyright 2010-2017 BYVoid <byvoid@byvoid.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "Common.hpp"
+
+namespace opencc {
+/**
+* Converts a dictionary from a format to another.
+* @ingroup opencc_cpp_api
+*/
+void ConvertDictionary(const string inputFileName, const string outputFileName,
+                       const string formatFrom, const string formatTo);
+}
diff --git a/src/DictEntry.hpp b/src/DictEntry.hpp

index ecef4891dd5cef98fa986b9c07e2ea7f2e042555..0a840a68e1a47c381be3f330f4a4f4fdf4b7dba5 100644 (file)
--- a/src/DictEntry.hpp
+++ b/src/DictEntry.hpp
@@ -140,11 +140,11 @@ public:
    size_t NumValues() const { return values.size(); }
  
    vector<const char*> Values() const {
-    vector<const char*> values;
+    vector<const char*> retsult;
      for (const string& value : this->values) {
-      values.push_back(value.c_str());
+      retsult.push_back(value.c_str());
      }
-    return values;
+    return retsult;
    }
  
  private:
diff --git a/src/Exception.hpp b/src/Exception.hpp

index a1134e850a0925f5f8f999cdd53ce772e1fa9195..33cb5d9d9a36ae36fe33e231cebe575fadf0981e 100644 (file)
--- a/src/Exception.hpp
+++ b/src/Exception.hpp
@@ -32,7 +32,7 @@
  
  namespace opencc {
  
-class OPENCC_EXPORT Exception : public std::exception {
+class OPENCC_EXPORT Exception {
  public:
    Exception() {}
  
diff --git a/src/PhraseExtract.cpp b/src/PhraseExtract.cpp

index 17b46d409d957f8109684c0703a9bfdc1ff97908..495ab31ad28fd608648d56dc772357b53b49844b 100644 (file)
--- a/src/PhraseExtract.cpp
+++ b/src/PhraseExtract.cpp
@@ -1,4 +1,4 @@
-/*
+/*
   * Open Chinese Convert
   *
   * Copyright 2015 BYVoid <byvoid@byvoid.com>
@@ -171,8 +171,9 @@ void PhraseExtract::ExtractSuffixes() {
          std::min(static_cast<LengthType>(wordMaxLength + suffixSetLength),
                   text.UTF8Length());
      const UTF8StringSlice& slice = text.Left(suffixLength);
-    suffixes.push_back(UTF8StringSlice8Bit(slice.CString(), slice.UTF8Length(),
-                                           slice.ByteLength()));
+    suffixes.push_back(UTF8StringSlice8Bit(slice.CString(), 
+        static_cast<UTF8StringSlice8Bit::LengthType>(slice.UTF8Length()),
+        static_cast<UTF8StringSlice8Bit::LengthType>(slice.ByteLength())));
    }
    suffixes.shrink_to_fit();
    // Sort suffixes
@@ -189,8 +190,10 @@ void PhraseExtract::ExtractPrefixes() {
          std::min(static_cast<LengthType>(wordMaxLength + prefixSetLength),
                   text.UTF8Length());
      const UTF8StringSlice& slice = text.Right(prefixLength);
-    prefixes.push_back(UTF8StringSlice8Bit(slice.CString(), slice.UTF8Length(),
-                                           slice.ByteLength()));
+    prefixes.push_back(UTF8StringSlice8Bit(slice.CString(),
+        static_cast<UTF8StringSlice8Bit::LengthType>(slice.UTF8Length()),
+        static_cast<UTF8StringSlice8Bit::LengthType>(slice.ByteLength())));
+
    }
    prefixes.shrink_to_fit();
    // Sort suffixes reversely
@@ -206,7 +209,7 @@ void PhraseExtract::CalculateFrequency() {
      ExtractSuffixes();
    }
    for (const auto& suffix : suffixes) {
-    for (size_t i = 1; i <= suffix.UTF8Length() && i <= wordMaxLength; i++) {
+    for (UTF8StringSlice8Bit::LengthType i = 1; i <= suffix.UTF8Length() && i <= wordMaxLength; i++) {
        const UTF8StringSlice8Bit wordCandidate = suffix.Left(i);
        signals->AddKey(wordCandidate).frequency++;
        totalOccurrence++;
@@ -263,6 +266,7 @@ void CalculatePrefixSuffixEntropy(
      const std::function<void(const PhraseExtract::UTF8StringSlice8Bit& word,
                               AdjacentSetType& adjacentSet)>& updateEntropy) {
    AdjacentSetType adjacentSet;
+  auto setLength8Bit = static_cast<PhraseExtract::UTF8StringSlice8Bit::LengthType>(setLength);
    for (PhraseExtract::LengthType length = wordMinLength;
         length <= wordMaxLength; length++) {
      adjacentSet.clear();
@@ -271,19 +275,20 @@ void CalculatePrefixSuffixEntropy(
        if (presuffix.UTF8Length() < length) {
          continue;
        }
+      auto length8Bit = static_cast<PhraseExtract::UTF8StringSlice8Bit::LengthType>(length);
        const auto& wordCandidate =
-          SUFFIX ? presuffix.Left(length) : presuffix.Right(length);
+          SUFFIX ? presuffix.Left(length8Bit) : presuffix.Right(length8Bit);
        if (wordCandidate != lastWord) {
          updateEntropy(lastWord, adjacentSet);
          lastWord = wordCandidate;
        }
        if (length + setLength <= presuffix.UTF8Length()) {
          if (SUFFIX) {
-          const auto& wordSuffix = presuffix.SubString(length, setLength);
+          const auto& wordSuffix = presuffix.SubString(length8Bit, setLength8Bit);
            adjacentSet[wordSuffix]++;
          } else {
            const auto& wordPrefix = presuffix.SubString(
-              presuffix.UTF8Length() - length - setLength, setLength);
+              presuffix.UTF8Length() - length8Bit - setLength8Bit, setLength8Bit);
            adjacentSet[wordPrefix]++;
          }
        }
@@ -393,7 +398,7 @@ double PhraseExtract::CalculateCohesion(
      const UTF8StringSlice8Bit& wordCandidate) const {
    // TODO Try average value
    double minPMI = INFINITY;
-  for (LengthType leftLength = 1; leftLength <= wordCandidate.UTF8Length() - 1;
+  for (UTF8StringSlice8Bit::LengthType leftLength = 1; leftLength <= wordCandidate.UTF8Length() - 1;
         leftLength++) {
      const auto& leftPart = wordCandidate.Left(leftLength);
      const auto& rightPart =
diff --git a/src/PhraseExtract.hpp b/src/PhraseExtract.hpp

index 4b11bd85992559b318ed59a85af09df0f0f0c5d7..5397c863050be6809e20dcb84a32231458ebe11e 100644 (file)
--- a/src/PhraseExtract.hpp
+++ b/src/PhraseExtract.hpp
@@ -25,7 +25,7 @@
  
  namespace opencc {
  
-class PhraseExtract {
+class OPENCC_EXPORT PhraseExtract {
  public:
    typedef UTF8StringSlice::LengthType LengthType;
  
diff --git a/src/SimpleConverter.cpp b/src/SimpleConverter.cpp

index b3dfb6000dfad190259df1bda13ac0f1ddc89fa4..a8e2ff60cf9d20c651a90c5dfaab168c0f588379 100644 (file)
--- a/src/SimpleConverter.cpp
+++ b/src/SimpleConverter.cpp
@@ -133,14 +133,9 @@ opencc_t opencc_open(const char* configFileName) {
  #endif
  
  int opencc_close(opencc_t opencc) {
-  try {
-    SimpleConverter* instance = reinterpret_cast<SimpleConverter*>(opencc);
-    delete instance;
-    return 0;
-  } catch (std::exception& ex) {
-    cError = ex.what();
-    return 1;
-  }
+  SimpleConverter* instance = reinterpret_cast<SimpleConverter*>(opencc);
+  delete instance;
+  return 0;
  }
  
  size_t opencc_convert_utf8_to_buffer(opencc_t opencc, const char* input,
diff --git a/src/UTF8StringSlice.hpp b/src/UTF8StringSlice.hpp

index 1a18b4ba9749a3483a4f236bab4aa2b41c58787c..960063138097c7e5b5cbfa5e0203d73aed50d965 100644 (file)
--- a/src/UTF8StringSlice.hpp
+++ b/src/UTF8StringSlice.hpp
@@ -40,10 +40,12 @@ inline size_t FNVHash<4>(const char* text, const size_t byteLength) {
    return FNVHash(text, byteLength, 16777619UL, 2166136261UL);
  }
  
+#if SIZE_MAX == 0xffffffffffffffff
  template <>
  inline size_t FNVHash<8>(const char* text, const size_t byteLength) {
    return FNVHash(text, byteLength, 1099511628211UL, 14695981039346656037UL);
  }
+#endif
  
  } // namespace internal
  
@@ -52,8 +54,8 @@ public:
    typedef LENGTH_TYPE LengthType;
  
    UTF8StringSliceBase(const char* _str)
-      : str(_str), utf8Length(UTF8Util::Length(_str)),
-        byteLength(strlen(_str)) {}
+      : str(_str), utf8Length(static_cast<LengthType>(UTF8Util::Length(_str))),
+        byteLength(static_cast<LengthType>(strlen(_str))) {}
  
    UTF8StringSliceBase(const char* _str, const LengthType _utf8Length)
        : str(_str), utf8Length(_utf8Length) {
@@ -70,36 +72,36 @@ public:
  
    LengthType ByteLength() const { return byteLength; }
  
-  UTF8StringSliceBase Left(const LengthType utf8Length) const {
-    if (utf8Length == UTF8Length()) {
+  UTF8StringSliceBase Left(const LengthType numberOfCharacters) const {
+    if (numberOfCharacters == UTF8Length()) {
        return *this;
      } else {
-      return UTF8StringSliceBase(str, utf8Length);
+      return UTF8StringSliceBase(str, numberOfCharacters);
      }
    }
  
-  UTF8StringSliceBase Right(const LengthType utf8Length) const {
-    if (utf8Length == UTF8Length()) {
+  UTF8StringSliceBase Right(const LengthType numberOfCharacters) const {
+    if (numberOfCharacters == UTF8Length()) {
        return *this;
      } else {
        const char* pstr = str + byteLength;
-      for (size_t i = 0; i < utf8Length; i++) {
+      for (size_t i = 0; i < numberOfCharacters; i++) {
          pstr = UTF8Util::PrevChar(pstr);
        }
-      return UTF8StringSliceBase(pstr, utf8Length);
+      return UTF8StringSliceBase(pstr, numberOfCharacters);
      }
    }
  
    UTF8StringSliceBase SubString(const LengthType offset,
-                                const LengthType utf8Length) const {
+                                const LengthType numberOfCharacters) const {
      if (offset == 0) {
-      return Left(utf8Length);
+      return Left(numberOfCharacters);
      } else {
        const char* pstr = str;
        for (size_t i = 0; i < offset; i++) {
          pstr = UTF8Util::NextChar(pstr);
        }
-      return UTF8StringSliceBase(pstr, utf8Length);
+      return UTF8StringSliceBase(pstr, numberOfCharacters);
      }
    }
  
@@ -223,7 +225,7 @@ private:
      for (size_t i = 0; i < utf8Length; i++) {
        pstr = UTF8Util::NextChar(pstr);
      }
-    byteLength = pstr - str;
+    byteLength = static_cast<LengthType>(pstr - str);
    }
  
    const char* str;
diff --git a/src/UTF8Util.hpp b/src/UTF8Util.hpp

index ea03dc015bb46df3a28cdd5361564cc9bc8d5b36..70bbf83b8a8a0d4956cac72a13b74498814cc8cf 100644 (file)
--- a/src/UTF8Util.hpp
+++ b/src/UTF8Util.hpp
@@ -262,20 +262,22 @@ public:
  #ifdef _MSC_VER
    static std::string U16ToU8(const std::wstring& wstr) {
      std::string ret;
-    int convcnt = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.length(), NULL, 0, NULL, NULL);
+    int length = static_cast<int>(wstr.length());
+    int convcnt = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, NULL, 0, NULL, NULL);
      if (convcnt > 0) {
        ret.resize(convcnt);
-      WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.length(), &ret[0], convcnt, NULL, NULL);
+      WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, &ret[0], convcnt, NULL, NULL);
      }
      return ret;
    }
  
    static std::wstring U8ToU16(const std::string& str) {
      std::wstring ret;
-    int convcnt = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0);
+    int length = static_cast<int>(str.length());
+    int convcnt = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), length, NULL, 0);
      if (convcnt > 0) {
        ret.resize(convcnt);
-      MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &ret[0], convcnt);
+      MultiByteToWideChar(CP_UTF8, 0, str.c_str(), length, &ret[0], convcnt);
      }
      return ret;
    }
diff --git a/src/tools/DictConverter.cpp b/src/tools/DictConverter.cpp

index 63838302dbdc3ccaed4c9367175d10fd5e6f3293..868e16d41cb6acd112f6ff7d083245a2e18a3128 100644 (file)
--- a/src/tools/DictConverter.cpp
+++ b/src/tools/DictConverter.cpp
@@ -17,43 +17,10 @@
   */
  
  #include "CmdLineOutput.hpp"
-#include "DartsDict.hpp"
-#include "TextDict.hpp"
+#include "DictConverter.hpp"
  
  using namespace opencc;
  
-DictPtr LoadDictionary(const string& format, const string& inputFileName) {
-  if (format == "text") {
-    return SerializableDict::NewFromFile<TextDict>(inputFileName);
-  } else if (format == "ocd") {
-    return SerializableDict::NewFromFile<DartsDict>(inputFileName);
-  } else {
-    fprintf(stderr, "Unknown dictionary format: %s\n", format.c_str());
-    exit(2);
-  }
-  return nullptr;
-}
-
-SerializableDictPtr ConvertDictionary(const string& format,
-                                      const DictPtr dict) {
-  if (format == "text") {
-    return TextDict::NewFromDict(*dict.get());
-  } else if (format == "ocd") {
-    return DartsDict::NewFromDict(*dict.get());
-  } else {
-    fprintf(stderr, "Unknown dictionary format: %s\n", format.c_str());
-    exit(2);
-  }
-  return nullptr;
-}
-
-void ConvertDictionary(const string inputFileName, const string outputFileName,
-                       const string formatFrom, const string formatTo) {
-  DictPtr dictFrom = LoadDictionary(formatFrom, inputFileName);
-  SerializableDictPtr dictTo = ConvertDictionary(formatTo, dictFrom);
-  dictTo->SerializeToFile(outputFileName);
-}
-
  int main(int argc, const char* argv[]) {
    try {
      TCLAP::CmdLine cmd("Open Chinese Convert (OpenCC) Dictionary Tool", ' ',
author	Boyuan Yang <073plan@gmail.com>
	Mon, 9 Oct 2017 13:42:06 +0000 (21:42 +0800)
committer	Boyuan Yang <073plan@gmail.com>
	Mon, 9 Oct 2017 13:42:06 +0000 (21:42 +0800)
CMakeLists.txt		patch \| blob \| history
NEWS.md		patch \| blob \| history
README.md		patch \| blob \| history
binding.gyp		patch \| blob \| history
data/CMakeLists.txt		patch \| blob \| history
data/dictionary/STCharacters.txt		patch \| blob \| history
data/dictionary/STPhrases.txt		patch \| blob \| history
data/dictionary/TWPhrasesOther.txt		patch \| blob \| history
data/dictionary/TWVariants.txt		patch \| blob \| history
data/scheme/st_multi.txt		patch \| blob \| history
doc/CMakeLists.txt		patch \| blob \| history
node/binding.cc		patch \| blob \| history
node/demo.js		patch \| blob \| history
node/dict.js	[new file with mode: 0644]	patch \| blob
node/dicts.gypi		patch \| blob \| history
node/global.gypi		patch \| blob \| history
node/node_binding.gypi		patch \| blob \| history
node/opencc.js		patch \| blob \| history
node/opencc_dict.gypi	[deleted file]	patch \| blob \| history
package.json		patch \| blob \| history
src/BinaryDict.cpp		patch \| blob \| history
src/CMakeLists.txt		patch \| blob \| history
src/Config.cpp		patch \| blob \| history
src/ConfigTest.cpp		patch \| blob \| history
src/DartsDict.cpp		patch \| blob \| history
src/Dict.cpp		patch \| blob \| history
src/DictConverter.cpp	[new file with mode: 0644]	patch \| blob
src/DictConverter.hpp	[new file with mode: 0644]	patch \| blob
src/DictEntry.hpp		patch \| blob \| history
src/Exception.hpp		patch \| blob \| history
src/PhraseExtract.cpp		patch \| blob \| history
src/PhraseExtract.hpp		patch \| blob \| history
src/SimpleConverter.cpp		patch \| blob \| history
src/UTF8StringSlice.hpp		patch \| blob \| history
src/UTF8Util.hpp		patch \| blob \| history
src/tools/DictConverter.cpp		patch \| blob \| history